OHI-Northeast | OHI Science | Citation policy
This layer is derived from EPA Beach Closure data. We use the number of days a beach is closed per year due to pathogens in the water as a proxy for the impact of pathogens in coastal waters. Data is provided at the beach level, aggregated to county and then again aggregated to the region level.
The highest observed pressure score was 9.5% in Rhode Island. Indicating, on average a given beach would be closed 9.5% of the year. This high result was largely driven to the Atlantic Beach Club beach being closed more than 80 days in 2006.
knitr::opts_chunk$set(fig.width = 10, fig.height = 6, fig.path = 'figs/', message = FALSE, warning = FALSE)
source('~/github/ne-prep/src/R/common.R')
library(tidyverse)# data for all states exept New York
df <- read_csv('data/beach_actions_(advisories_and_closures).csv') %>%
mutate(state =
case_when(
State == "MA" ~ "Massachusetts",
State == "CT" ~ "Connecticut",
State == "ME" ~ "Maine",
State == "NH" ~ "New Hampshire",
State == "RI" ~ "Rhode Island"
)) %>%
select(-State) #removing state abbreviation columnI downloaded the New York beaches dataset on its own. We have to filter out beaches from the great lakes and finger lakes regions.
ny <- read_csv('data/beach_actions_(advisories_and_closures)_NY.csv') %>%
filter(County %in% c('BRONX','QUEENS','KINGS','SUFFOLK','NASSAU','RICHMOND','WESTCHESTER')) %>% #ocean counties
mutate(state = "New York") %>%
select(-State) #removing state abbreviation columnSince Massachussetts has state waters divided into two regions, we have to manually assign counties to the Gulf of Maine and Virginian regions
split <- df %>%
filter(County %in% c('BARNSTABLE','PLYMOUTH')) %>%
select(state, County, `Beach Name`) %>%
unique()
nrow(split)## [1] 197
write.csv(split,file = 'data/ma_beaches.csv')There are 197 beaches in these two counties that require manual matching. Using the BEACON map viewer, I matched each beach in Plymouth and Barnstable Counties to either Region 7 or 8.
Now combine all beaches to rgn_ids
mass_bch <- read_csv('data/ma_beaches_rgn_id.csv')[, 1:4] %>%
mutate(state = "Massachusetts") %>%
select(-State)
mass_cnty <- read_csv('~/github/ne-prep/src/tables/MA_counties.csv')[, 2:4]
mass_cnty$County = toupper(mass_cnty$County)
df_pb <- df %>%
filter(state == 'Massachusetts' & County %in% c('BARNSTABLE', 'PLYMOUTH')) %>%
left_join(mass_bch, by = c('state', 'County', 'Beach Name')) %>%
select(state, County, Year, `Beach Name`, ActionStartDate, ActionEndDate, `ActionDuration Days`, rgn_id)
df_ma <- df %>%
filter(state == 'Massachusetts'&
!County %in% c('BARNSTABLE', 'PLYMOUTH')) %>%
left_join(mass_cnty, by = 'County') %>%
select(state, County, Year, `Beach Name`, ActionStartDate, ActionEndDate, `ActionDuration Days`, rgn_id)
#matching rgn_id to non MASS data
df_rest <- df %>%
rbind(ny) %>% #add in New York
filter(state != 'Massachusetts') %>%
left_join(rgn_data, by = "state") %>%
select(state, County, Year, `Beach Name`, ActionStartDate, ActionEndDate, `ActionDuration Days`, rgn_id)
df_all = rbind(df_pb, df_ma, df_rest) %>%
left_join(rgn_data, by = c('rgn_id', 'state')) library(trelliscopejs)
#percent of days beach is closed by county
perc <- df_all %>%
unique() %>% #some weird duplicates
group_by(`Beach Name`, Year, County, rgn_id, rgn_name) %>%
summarize(days_closed = sum(`ActionDuration Days`)) %>%
mutate(perc_closed = days_closed/365) %>%
ungroup() %>%
group_by(County, rgn_id, Year,rgn_name) %>%
summarize(perc_closed = mean(perc_closed))
ggplot(perc, aes(x = Year, y = perc_closed, color = `County`)) +
geom_line() +
theme_bw() +
trelliscopejs::facet_trelliscope(~rgn_name, self_contained = TRUE, scales = "free")I think taking the average makes the most sense, especially since there are differences in number of sites between regions and years (although not that varied).
ggplot(perc, aes(x = Year)) +
geom_bar(position = 'dodge') +
facet_wrap(~rgn_name) +
theme_bw() +
labs(y = "Number of sites",
title = "Site sampling per region")ggplot(perc %>%
group_by(rgn_name, rgn_id, Year) %>%
summarize(perc_closure = mean(perc_closed)*100),
aes(x = Year, y = perc_closure, color = rgn_name)) +
geom_line() +
labs(title = "Beach Closures (mean annual proportion any given beach is closed)",
y = "Proportion (%) of year",
color = "Region") +
theme_bw()The Clean Waters goal will use this data to measure how clean the water is. For this layer we want the final values to be the inverse of closures, so the amount of time each year that the region has beaches open due to clean water from pathogens. We need to add in the other offshore regions and assign NA for the toolbox layer.
other_rgns <- data.frame(year = rep(2005:2016, each = 4),
rgn_name = c("Offshore", "Georges Bank", "Gulf of Maine", "Mid-Atlantic Bight"),
rgn_id = c(1,2,3,4),
perc_open = NA)
perc %>%
group_by(rgn_name, rgn_id, Year) %>%
summarize(perc_open = mean(1-perc_closed)) %>%
rename(year = Year) %>%
bind_rows(other_rgns) %>%
write.csv(file.path(dir_calc, "layers/cw_pathogens.csv"))This data is also used as a pressure in the Index. But since pressure values go from 0 (low pressure) to 1 (highest pressure) we want to use the annual average proportion of beach closures with 100% being the highest (and worst) value possible.
other_rgns <- data.frame(year = rep(2005:2016, each = 4),
rgn_name = c("Offshore", "Georges Bank", "Gulf of Maine", "Mid-Atlantic Bight"),
rgn_id = c(1,2,3,4),
perc_closed = NA)
perc %>%
group_by(rgn_name, rgn_id, Year) %>%
summarize(perc_closed = mean(perc_closed)) %>%
select(-rgn_name) %>% #remove rgn_name from layer for toolbox
rename(year = Year) %>%
write.csv(file.path(dir_calc, "layers/prs_pathogens.csv")) #saved as prs_pathogens b/c this is a pressure